rm(list=ls()) 

###############################################################################
# GCI PROCESSING SCRIPT
## creates following data files: 
## - "candidate_GCI.RData"
## - "candidate_GCI_RB.RData"
###############################################################################

#load packages
library(countrycode)
library(WDI)

#set wd 
setwd("/Users/cbj4/Desktop/replication")

##############################
#STEP 1: DATA LOAD AND CLEAN
##############################
GCI <- read.csv("raw_data/GCI_data.csv")
ITU <- read.csv("raw_data/ITU.csv")
RB_ITU <- read.csv("raw_data/RB_ITU.csv")
ITU_regions <- read.csv("raw_data/ITU_regions.csv")

# create ccode variable
GCI$ccode <- countrycode(GCI$country, origin = "country.name",
                         destination = "cown", 
                         nomatch = 0)

# create ccode variable
ITU$ccode <- countrycode(ITU$state, origin = "country.name",
                         destination = "cown", 
                         nomatch = 0)

ITU <- subset(ITU, elect_year > 2017)

# merge data by ccode-year 
candidate_GCI <- merge(ITU, GCI, by.x = c("ccode", "elect_year"),
                       by.y = c("ccode", "year"), 
                       all.x = T)

# create naive estimate
LM1 <- lm(votes ~ score, data = candidate_GCI)
summary(LM1)

##############################
#STEP 2: ADD ITU REGION DATA
##############################
ITU_regions$ccode <- countrycode(ITU_regions$country, origin = "country.name",
                                 destination = "cown", 
                                 nomatch = 0)

candidate_GCI <- merge(candidate_GCI, ITU_regions, by = c("ccode"), 
                       all.x = T)

##############################
#STEP 3: CHECK MODEL 1
##############################

# create region FE estimate
LM1 <- lm(votes ~ ranking + factor(region), data = candidate_GCI)
summary(LM1)

##############################
#STEP 4: ADD CONTROLS
##############################

# GNP label NY.GNP.PCAP.CD
controls <- WDI(
  country = "all",
  indicator = c("NY.GDP.PCAP.KD", "SP.POP.TOTL"),
  start = 2018,
  end = 2022,
  extra = FALSE,
  cache = NULL,
  latest = NULL,
  language = "en"
)

controls$ccode <- countrycode(controls$iso3c, origin = "iso3c", destination = "cown")
controls$ccode <- ifelse(controls$ccode == "SRB", 345, controls$ccode)

candidate_GCI <- merge(candidate_GCI, controls, by.x = c("ccode", "elect_year"),
                       by.y = c("ccode", "year"), all.x = T)

names(candidate_GCI)[14:15] <- c("gdp_percap", "pop")

# add UNGA controls 
load("raw_data/AgreementScoresAll_Jul2023.RData")
dfAgree <- dfAgree[,-c(1,6:9)]

## Agreement with US
dfAgree_US <- subset(dfAgree, ccode1 == 2)
names(dfAgree_US)[3] <- c("agree_US")
names(dfAgree_US)[5] <- c("idealpoint_US")

candidate_GCI <- merge(candidate_GCI, dfAgree_US, by.x = c("ccode", "elect_year"), by.y = c("ccode2", "year"), all.x =T)
candidate_GCI$idealpoint_US <- ifelse(candidate_GCI$ccode ==2, 0, candidate_GCI$idealpoint_US)
candidate_GCI$agree_US <- ifelse(candidate_GCI$ccode ==2, 0, candidate_GCI$agree_US)

## Agreement with Russia
dfAgree_RU <- subset(dfAgree, ccode1 == 365)
names(dfAgree_RU)[3] <- c("agree_RU")
names(dfAgree_RU)[5] <- c("idealpoint_RU")

candidate_GCI <- merge(candidate_GCI, dfAgree_RU, by.x = c("ccode", "elect_year"), by.y = c("ccode2", "year"), all.x =T)
candidate_GCI$idealpoint_RU <- ifelse(candidate_GCI$ccode ==365, 0, candidate_GCI$idealpoint_RU)
candidate_GCI$agree_RU <- ifelse(candidate_GCI$ccode ==365, 0, candidate_GCI$agree_RU)

## Agreement with China
dfAgree_CH <- subset(dfAgree, ccode1 == 710)
names(dfAgree_CH)[3] <- c("agree_CH")
names(dfAgree_CH)[5] <- c("idealpoint_CH")

candidate_GCI <- merge(candidate_GCI, dfAgree_CH, by.x = c("ccode", "elect_year"), by.y = c("ccode2", "year"), all.x =T)
candidate_GCI$idealpoint_CH <- ifelse(candidate_GCI$ccode ==710, 0, candidate_GCI$idealpoint_CH)
candidate_GCI$agree_CH <- ifelse(candidate_GCI$ccode ==710, 0, candidate_GCI$agree_CH)

# log gdp variables 
candidate_GCI$log_gdp_percap <- log(candidate_GCI$gdp_percap)
candidate_GCI$log_pop <- log(candidate_GCI$pop)

# add nato, eu, g77, oic controls
nato <- read.csv("raw_data/nato_countries.csv")
eu <- read.csv("raw_data/eu_countries.csv")
g77 <- read.csv("raw_data/g77_countries.csv")
oic <- read.csv("raw_data/oic_countries.csv")

eu$ccode <- countrycode(eu$countries, origin = "country.name", destination = "cown", nomatch = NA)
nato$ccode <- countrycode(nato$country, origin = "country.name", destination = "cown", nomatch = NA)
g77$ccode <- countrycode(g77$country, origin = "country.name", destination = "cown", nomatch = NA)
oic$ccode <- countrycode(oic$country, origin = "country.name", destination = "cown", nomatch = NA)

nato$nato_bin <- rep(1)
eu$eu_bin <- rep(1)
g77$g77_bin <- rep(1)
oic$oic_bin <- rep(1)

nato <- nato[,2:4]
eu <- eu[,2:3]
g77 <- g77[,3:4]
oic <- oic[,2:3]

nato <- subset(nato, year1 < 2019)

candidate_GCI <- merge(candidate_GCI, nato, by = "ccode", all.x = T)
candidate_GCI <- merge(candidate_GCI, eu, by = "ccode", all.x = T)
candidate_GCI <- merge(candidate_GCI, g77, by = "ccode", all.x = T)
candidate_GCI <- merge(candidate_GCI, oic, by = "ccode", all.x = T)

candidate_GCI$nato_bin[is.na(candidate_GCI$nato_bin)] <- 0
candidate_GCI$eu_bin[is.na(candidate_GCI$eu_bin)] <- 0
candidate_GCI$g77_bin[is.na(candidate_GCI$g77_bin)] <- 0
candidate_GCI$oic_bin[is.na(candidate_GCI$oic_bin)] <- 0

candidate_GCI$region <- ifelse(candidate_GCI$ccode == 640, "B",candidate_GCI$region)

##############################
#STEP 5: CHECK MODEL 2
##############################

LM3 <- lm(votes ~ ranking + factor(region)  + log_gdp_percap + 
            log_pop + idealpoint_US + idealpoint_RU + nato_bin + eu_bin + g77_bin + oic_bin, data = candidate_GCI)
summary(LM3)

cor(candidate_GCI$agree_CH, candidate_GCI$agree_RU, use='complete.obs')
cor(candidate_GCI$agree_US, candidate_GCI$agree_RU, use='complete.obs')
cor(candidate_GCI$agree_US, candidate_GCI$agree_CH, use='complete.obs')

#######################################################################
# STEP 6: CREATE FINAL "candidate_GCI.RData" DATA SAVE AS RData
#######################################################################

#save(candidate_GCI, file = "data/candidate_GCI.RData")

#############################
# STEP 7: CREATE RB GCI DATA
#############################

# create ccode variable
RB_ITU$ccode <- countrycode(RB_ITU$state, origin = "country.name",
                         destination = "cown", 
                         nomatch = 0)
RB_ITU <- subset(RB_ITU, elect.year > 2017)

# merge data by ccode-year 
RBcandidate_GCI <- merge(RB_ITU, GCI, by.x = c("ccode", "elect.year"),
                       by.y = c("ccode", "year"), 
                       all.x = T)

# create naive estimate
LM1 <- lm(votes ~ score, data = RBcandidate_GCI)
summary(LM1)

RBcandidate_GCI <- merge(RBcandidate_GCI, ITU_regions, by = c("ccode"), 
                       all.x = T)

##############################
#STEP 8: CHECK MODEL 1
##############################
LM1 <- lm(votes ~ ranking + factor(region), data = RBcandidate_GCI)
summary(LM1)

##############################
#STEP 9: ADD CONTROLS
##############################

# add wdi controls 
RBcandidate_GCI <- merge(RBcandidate_GCI, controls, by.x = c("ccode", "elect.year"),
                       by.y = c("ccode", "year"), all.x = T)

names(RBcandidate_GCI)[14:15] <- c("gdp_percap", "pop")

# log gdp variables 
RBcandidate_GCI$log_gdp_percap <- log(RBcandidate_GCI$gdp_percap)
RBcandidate_GCI$log_pop <- log(RBcandidate_GCI$pop)

# add unga similarity controls 
RBcandidate_GCI <- merge(RBcandidate_GCI, dfAgree_US, by.x = c("ccode", "elect.year"), by.y = c("ccode2", "year"), all.x =T)
RBcandidate_GCI$idealpoint_US <- ifelse(RBcandidate_GCI$ccode ==2, 0, RBcandidate_GCI$idealpoint_US)
RBcandidate_GCI$agree_US <- ifelse(RBcandidate_GCI$ccode ==2, 0, RBcandidate_GCI$agree_US)

RBcandidate_GCI <- merge(RBcandidate_GCI, dfAgree_RU, by.x = c("ccode", "elect.year"), by.y = c("ccode2", "year"), all.x =T)
RBcandidate_GCI$idealpoint_RU <- ifelse(RBcandidate_GCI$ccode ==365, 0, RBcandidate_GCI$idealpoint_RU)
RBcandidate_GCI$agree_RU <- ifelse(RBcandidate_GCI$ccode ==365, 0, RBcandidate_GCI$agree_RU)

RBcandidate_GCI <- merge(RBcandidate_GCI, dfAgree_CH, by.x = c("ccode", "elect.year"), by.y = c("ccode2", "year"), all.x =T)
RBcandidate_GCI$idealpoint_CH <- ifelse(RBcandidate_GCI$ccode ==710, 0, RBcandidate_GCI$idealpoint_CH)
RBcandidate_GCI$agree_CH <- ifelse(RBcandidate_GCI$ccode ==710, 0, RBcandidate_GCI$agree_CH)

# add nato, eu, g77, oic controls
RBcandidate_GCI <- merge(RBcandidate_GCI, nato, by = "ccode", all.x = T)
RBcandidate_GCI <- merge(RBcandidate_GCI, eu, by = "ccode", all.x = T)
RBcandidate_GCI <- merge(RBcandidate_GCI, g77, by = "ccode", all.x = T)
RBcandidate_GCI <- merge(RBcandidate_GCI, oic, by = "ccode", all.x = T)

RBcandidate_GCI$nato_bin[is.na(RBcandidate_GCI$nato_bin)] <- 0
RBcandidate_GCI$eu_bin[is.na(RBcandidate_GCI$eu_bin)] <- 0
RBcandidate_GCI$g77_bin[is.na(RBcandidate_GCI$g77_bin)] <- 0
RBcandidate_GCI$oic_bin[is.na(RBcandidate_GCI$oic_bin)] <- 0

##############################
#STEP 10: CHECK MODEL 2
##############################
LM3 <- lm(votes ~ ranking + factor(region)  + log_gdp_percap + 
            log_pop + idealpoint_US + idealpoint_RU + nato_bin + eu_bin + g77_bin + oic_bin, data = RBcandidate_GCI)
summary(LM3)

#######################################################################
# STEP 6: CREATE FINAL "candidate_GCI.RData" DATA SAVE AS RData
#######################################################################

#save(RBcandidate_GCI, file = "data/candidate_GCI_RB.RData")
